#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: copy_zhejiang_company.py
@time: 2018/1/2 10:43
"""

import sys

sys.path.append('..')
sys.path.append('../..')
from common import util
from config.app_conf import PROVINCE_REGISTER_CODE

from common.mongo import MongDb
from logger import Logger

MONGO_DB_SOURCE = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'schedule_data',
    'username': 'work',
    'password': 'haizhi'
}

MONGO_DB_TARGET = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'company_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('copy_zhejiang_company.log').get_logger()

source_db = MongDb(MONGO_DB_SOURCE['host'], MONGO_DB_SOURCE['port'], MONGO_DB_SOURCE['db'],
                   MONGO_DB_SOURCE['username'],
                   MONGO_DB_SOURCE['password'], log=log)

target_db = MongDb(MONGO_DB_TARGET['host'], MONGO_DB_TARGET['port'], MONGO_DB_TARGET['db'],
                   MONGO_DB_TARGET['username'],
                   MONGO_DB_TARGET['password'], log=log)


# 根据注册号计算省份信息
def cal_province(register_code):
    if not isinstance(register_code, basestring):
        return 'gsxt'

    if len(register_code) == 15 and register_code[0:2] in PROVINCE_REGISTER_CODE:
        return PROVINCE_REGISTER_CODE[register_code[0:2]]

    if len(register_code) == 18 and register_code[2:4] in PROVINCE_REGISTER_CODE:
        return PROVINCE_REGISTER_CODE[register_code[2:4]]

    return 'gsxt'


def main():
    source_table_name = 'gsxt_app_all_names'
    target_table_name = 'zhejiang_2017_company'

    count = 0
    result_list = []
    for item in source_db.traverse_batch_field(source_table_name,
                                               {'ESTDATE': {'$gte': '2016年12月31日'}},
                                               ['_id', 'ESTDATE', 'UNISCID']):
        count += 1
        company = item.get('_id')
        register_date = item.get("ESTDATE")
        register_code = item.get('UNISCID')

        province = cal_province(register_code)
        if province != 'zhejiang':
            continue

        data = {
            '_id': company,
            'register_date': register_date,
            'register_code': register_code,
            '_in_time': util.get_now_time()
        }
        result_list.append(data)
        if len(result_list) >= 500:
            target_db.insert_batch_data(target_table_name, result_list)
            del result_list[:]

        if count % 1000 == 0:
            log.info("当前复制进度: count = {}".format(count))

    if len(result_list) > 0:
        target_db.insert_batch_data(target_table_name, result_list)
        del result_list[:]
    log.info("复制完成...")


if __name__ == '__main__':
    main()
